1. Differential expression: DG>HP, DG>brain
  2. Add human database with SGZ
  3. Add mouse microarray/RNAseq

Load Data

library(readxl)
library(dplyr)
library(ggplot2)
library(scales)
library(ade4)
library(plotrix)
library(stargazer)

# *** CHANGE GENE NAME ***
gene_name <- "EPHA7"

microarray <- read_excel("/Users/schilder/Desktop/Dissertation/Gene_Expression/ABA_Microarray_data.xlsx", na = "NA", sheet = paste(gene_name))
  str(microarray)
## Classes 'tbl_df', 'tbl' and 'data.frame':    2892 obs. of  17 variables:
##  $ Species                         : chr  "Human" "Human" "Human" "Human" ...
##  $ donor_id                        : num  9861 9861 9861 9861 9861 ...
##  $ donor_name                      : chr  "H0351.2001" "H0351.2001" "H0351.2001" "H0351.2001" ...
##  $ donor_age                       : chr  "24 years" "24 years" "24 years" "24 years" ...
##  $ donor_color                     : chr  "EC891D" "EC891D" "EC891D" "EC891D" ...
##  $ structure_id                    : num  4053 4078 4888 4047 4041 ...
##  $ structure_name                  : chr  "anterior orbital gyrus" "frontal operculum" "frontal pole" "gyrus rectus" ...
##  $ structure_abbreviation          : chr  "AOrG" "fro" "FP" "GRe" ...
##  $ structure_color                 : chr  "E8BF59" "E8C159" "E8C359" "E8C459" ...
##  $ top_level_structure_id          : num  4009 4009 4009 4009 4009 ...
##  $ top_level_structure_name        : chr  "frontal lobe" "frontal lobe" "frontal lobe" "frontal lobe" ...
##  $ top_level_structure_abbreviation: chr  "FL" "FL" "FL" "FL" ...
##  $ top_level_structure_color       : chr  "E8CD59" "E8CD59" "E8CD59" "E8CD59" ...
##  $ 1057236                         : num  0.228 -0.213 -0.199 -0.24 -0.378 ...
##  $ 1057237                         : num  -0.494 0.22 -0.102 -0.281 0.417 ...
##  $ 1065061                         : num  -0.684 -0.577 -0.624 -0.743 -0.857 ...
##  $ Expression                      : num  -0.316 -0.19 -0.308 -0.421 -0.273 ...
RNAseq <- read_excel("/Users/schilder/Desktop/Dissertation/Gene_Expression/ABA_RNAseq_data.xlsx", na = "NA", sheet = paste(gene_name))
  str(RNAseq)
## Classes 'tbl_df', 'tbl' and 'data.frame':    524 obs. of  13 variables:
##  $ donor_id                        : num  13058 13058 13058 13058 13058 ...
##  $ donor_name                      : chr  "H376.IIA.51" "H376.IIA.51" "H376.IIA.51" "H376.IIA.51" ...
##  $ donor_age                       : chr  "8 pcw" "8 pcw" "8 pcw" "8 pcw" ...
##  $ donor_color                     : chr  "0000FF" "0000FF" "0000FF" "0000FF" ...
##  $ structure_id                    : num  10173 10185 10278 10194 10291 ...
##  $ structure_name                  : chr  "dorsolateral prefrontal cortex" "ventrolateral prefrontal cortex" "anterior (rostral) cingulate (medial prefrontal) cortex" "orbital frontal cortex" ...
##  $ structure_abbreviation          : chr  "DFC" "VFC" "MFC" "OFC" ...
##  $ structure_color                 : chr  "D4B235" "C2A335" "E26880" "E2D4A7" ...
##  $ top_level_structure_id          : num  10153 10153 10153 10153 10153 ...
##  $ top_level_structure_name        : chr  "neural plate" "neural plate" "neural plate" "neural plate" ...
##  $ top_level_structure_abbreviation: chr  "NP" "NP" "NP" "NP" ...
##  $ top_level_structure_color       : chr  "D7D8D8" "D7D8D8" "D7D8D8" "D7D8D8" ...
##  $ Expression                      : num  5.09 5.43 4.73 3.61 5.42 ...

Microarray

Macaque Microarray

Adult Macaque microarray

macaque_adult <- filter(microarray, Species == "Macaque", donor_age == "48 mo") %>% droplevels()

# Top structures: Interindividual variation
ggplot(data = macaque_adult) + geom_boxplot(aes(x=donor_name, y=Expression, fill=top_level_structure_name)) + labs(title=paste("Macaque",gene_name,"Expression:\nInterindividual Variation"), x="Donor ID", y="Expression Level")

# Top structures: Average
ggplot(data = macaque_adult) + geom_boxplot(aes(x=top_level_structure_name, y=Expression, fill=top_level_structure_name)) + 
  labs(title=paste("Adult Macaque",gene_name,"Expression: \nAverage"), x="Structure Name", y="Expression Level") + scale_x_discrete(labels=c("Amygdaloid Complex", "Basal ganglia", "Total Brain", "Hippocampal formation")) + 
  theme(legend.position="none", axis.text.x = element_text(color="black"))

# Hippocampal substructures: Interindividual Variaton
ggplot(data = filter(macaque_adult, top_level_structure_name == "hippocampal cortex (hippocampal formation)")) + 
  geom_bar(aes(x=donor_name, y=Expression, fill=structure_name), stat="identity", position="dodge") +
  labs(title=paste("Adult Macaque",gene_name,"Expression: Hippocampal Substructures: \nInterindividual Variation"), x="Substructure", y="Expression Level")

# Hippocampal substructures: Average
ggplot(data = filter(macaque_adult, top_level_structure_name == "hippocampal cortex (hippocampal formation)")) + geom_boxplot(aes(x=structure_name, y=Expression, fill=structure_name)) +
  labs(title=paste("Adult Macaque",gene_name,"Expression: Hippocampal Substructures: \nAverage"), x="Substructure", y="Expression Level") +   
  theme(legend.position="none", legend.key.size=grid::unit(.4, "cm"), legend.text=element_text(size=7), axis.text.x = element_text(angle=45, color="black", size=10, hjust=1))

Adult Human Microarray

Microarray: Interspecies Comparison

Prepare data

# Macaque: For postnatal brains, RIGHT hems were used for microarray and left hems were used for ISH. 
# Human: Both left and right included separately for humans.

# Duplicate structure_names
microarray$structure_name2 <- microarray$structure_name

# Change names of macaque subregions to be more general
  # ** Assumes left and right hemisphere are not significantly different
microarray$structure_name2 <- gsub("granular layer of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("subgranular zone of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("CA4 region", "CA4", microarray$structure_name2)
microarray$structure_name2 <- gsub("polyform layer of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("stratum pyramidale of CA3", "CA3", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum pyramidale of CA2", "CA2", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum pyramidale of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum oriens of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum radiatum of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("subiculum", "Sub", microarray$structure_name2)
# Repeat for human subregions
microarray$structure_name2 <- gsub("CA1 field, left", "CA1", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA1 field, right", "CA1", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA1 field", "CA1", microarray$structure_name2)

microarray$structure_name2 <- gsub("CA2 field, left", "CA2", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA2 field, right", "CA2", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA2 field", "CA2", microarray$structure_name2)

microarray$structure_name2 <- gsub("CA3 field, left", "CA3", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA3 field, right", "CA3", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA3 field", "CA3", microarray$structure_name2)

microarray$structure_name2 <- gsub("CA4 field, left", "CA4", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA4 field, right", "CA4", microarray$structure_name2)
  microarray$structure_name2 <- gsub("CA4 field", "CA4", microarray$structure_name2)

microarray$structure_name2 <- gsub("dentate gyrus, left", "DG", microarray$structure_name2) 
  microarray$structure_name2 <- gsub("dentate gyrus, right", "DG", microarray$structure_name2)
  microarray$structure_name2 <- gsub("dentate gyrus", "DG", microarray$structure_name2)

microarray$structure_name2 <- gsub("Sub, left", "Sub", microarray$structure_name2)
  microarray$structure_name2 <- gsub("Sub, right", "Sub", microarray$structure_name2)
  microarray$structure_name2 <- gsub("subiculum", "Sub", microarray$structure_name2)
  
# rescale macaques and humans to make comparable (**Double check that I'm doing this right)
mac_scaled <- filter(microarray, Species == "Macaque", donor_age=="48 mo")
  mac_scaled$Expression <- rescale(mac_scaled$Expression, c(-1,1))
human_scaled <- filter(microarray, Species=="Human")
  human_scaled$Expression <- rescale(human_scaled$Expression, c(-1,1))
  
combo <- rbind(mac_scaled, human_scaled)
adult_HP <- filter(combo, top_level_structure_name == "hippocampal formation"|top_level_structure_name == "hippocampal cortex (hippocampal formation)", structure_name2 != "NA") %>% droplevels()
unique(adult_HP$structure_name2)
## [1] "DG"  "CA1" "CA2" "CA3" "CA4" "Sub"

Separate adult macaques, then rescale them by themselves. This is because the original macaque data has developmental data, while the original human data is only adults. This difference could cause a bias to make macaques seem like they have lower expression, since many of these genes are highly upregulated during development.

Interspecies Comparisons

Interspecies Plots

Interspecies Analyses

Separate Subfields

# Whole hippocampus
HP_model <- lm(data = filter(adult_HP), Expression ~ Species)
# DG model
DG_model <- lm(data = filter(adult_HP, structure_name2=="DG"), Expression ~ Species)
# CA4 model
CA4_model <- lm(data = filter(adult_HP, structure_name2=="CA4"), Expression ~ Species)
# CA3 model
CA3_model <- lm(data = filter(adult_HP, structure_name2=="CA3"), Expression ~ Species)
# CA2 model
CA2_model <- lm(data = filter(adult_HP, structure_name2=="CA2"), Expression ~ Species)
# CA1 model
CA1_model <- lm(data = filter(adult_HP, structure_name2=="CA1"), Expression ~ Species)
# Sub model
Sub_model <- lm(data = filter(adult_HP, structure_name2=="Sub"), Expression ~ Species)

stargazer(HP_model, DG_model, CA4_model, CA3_model, CA2_model, CA1_model, Sub_model,
          type="html", dep.var.labels = "", dep.var.caption = paste(gene_name, "Expression"), model.numbers = FALSE, column.labels = c("Whole HP", "DG", "CA4", "CA3", "CA2", "CA1", "Sub"), covariate.labels = c("Species", "Intercept"), star.cutoffs=c(0.05, 0.01, 0.001), align = TRUE, notes.align = "l", no.space = TRUE, title="Hippocampal Subfield Expression: Interspecies Comparisons", report="vcsp*", df = FALSE)
Hippocampal Subfield Expression: Interspecies Comparisons
EPHA7 Expression
Whole HP DG CA4 CA3 CA2 CA1 Sub
Species 0.136 -0.236 0.180 0.309 0.046 0.055 0.169
(0.076) (0.124) (0.030) (0.054) (0.053) (0.121) (0.064)
p = 0.081 p = 0.079 p = 0.001*** p = 0.001*** p = 0.423 p = 0.661 p = 0.033*
Intercept 0.234 0.864 0.142 0.033 -0.018 0.171 0.212
(0.050) (0.096) (0.015) (0.031) (0.031) (0.092) (0.037)
p = 0.00002*** p = 0.00000*** p = 0.0001*** p = 0.326 p = 0.583 p = 0.086 p = 0.001***
Observations 64 15 8 9 9 14 9
R2 0.049 0.219 0.860 0.824 0.094 0.017 0.503
Adjusted R2 0.033 0.159 0.837 0.799 -0.036 -0.065 0.433
Residual Std. Error 0.302 0.235 0.036 0.076 0.076 0.224 0.090
F Statistic 3.167 3.654 36.944*** 32.712*** 0.725 0.203 7.097*
Note: p<0.05; p<0.01; p<0.001

Multivariate models

Prepare multi data

library(tidyr)
grouped <- adult_HP %>% spread(key=structure_name2, value=Expression) %>% 
  group_by(donor_name) %>% 
  dplyr::summarise(DG = mean(DG, na.rm=TRUE), 
            #CA4 = mean(CA4, na.rm=TRUE), 
            CA3 = mean(CA3, na.rm=TRUE), 
            CA2 = mean(CA2, na.rm=TRUE),
            CA1 = mean(CA1, na.rm=TRUE),
            Sub = mean(Sub, na.rm=TRUE))
grouped$Species <- grouped$donor_name
grouped$Species <- gsub(pattern = "H0351.1009", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1012", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1015", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1016", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.2001", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.2002", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36322", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36358", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36468", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped
## # A tibble: 9 Ă— 7
##   donor_name        DG         CA3         CA2        CA1       Sub
##        <chr>     <dbl>       <dbl>       <dbl>      <dbl>     <dbl>
## 1 H0351.1009 0.7676549 -0.04193689  0.03322052 0.11289682 0.3282819
## 2 H0351.1012 0.8250109  0.03132977 -0.02301046 0.34648982 0.2145720
## 3 H0351.1015 0.7211897  0.01957874 -0.17211518 0.03773942 0.1683526
## 4 H0351.1016 0.9328972  0.05146628  0.06154399 0.22528314 0.2916769
## 5 H0351.2001 1.0000000 -0.01401047 -0.01425627 0.12390100 0.1383747
## 6 H0351.2002 0.9359224  0.15095767  0.00778990 0.18173911 0.1293747
## 7   MMU36322 0.6128473  0.37589844 -0.03767784 0.26293415 0.5037124
## 8   MMU36358 0.6489927  0.41550417  0.05003609 0.19845374 0.3223922
## 9   MMU36468 0.6202029  0.23281956  0.07087704 0.22860128 0.3172907
## # ... with 1 more variables: Species <chr>

Can choose whether or not to include CA4. Could also just combine it with DG.

Multivariate linear model

anova(lm(grouped, formula = DG + CA3 + CA2 + CA1 + Sub ~ Species, na.action=na.omit))
## Analysis of Variance Table
## 
## Response: DG + CA3 + CA2 + CA1 + Sub
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## Species    1 0.23894 0.23894   4.154 0.08094 .
## Residuals  7 0.40264 0.05752                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation matrices

# Macaques
mac <- filter(grouped, Species=="Macaque") %>% dplyr::select(DG:Sub) %>% na.omit()
mac_cor <- cor(mac, dplyr::select(mac, DG:Sub)) # Correlation matrix
stargazer(mac_cor, type="html")
DG CA3 CA2 CA1 Sub
DG 1 0.512 0.510 -0.933 -0.639
CA3 0.512 1 -0.478 -0.169 0.334
CA2 0.510 -0.478 1 -0.785 -0.988
CA1 -0.933 -0.169 -0.785 1 0.873
Sub -0.639 0.334 -0.988 0.873 1
# Humans
hum <- filter(grouped, Species=="Human") %>% dplyr::select(DG:Sub) %>% na.omit()
hum_cor <- cor(hum) # Correlation matrix
stargazer(hum_cor, type="html")
DG CA3 CA2 CA1 Sub
DG 1 0.345 0.563 0.272 -0.345
CA3 0.345 1 0.094 0.290 -0.470
CA2 0.563 0.094 1 0.483 0.492
CA1 0.272 0.290 0.483 1 0.145
Sub -0.345 -0.470 0.492 0.145 1
# All species
all <- dplyr::select(grouped, DG:Sub) %>% na.omit()
all_cor <- cor(all) # Correlation matrix
stargazer(all_cor, type="html")
DG CA3 CA2 CA1 Sub
DG 1 -0.659 0.054 -0.122 -0.701
CA3 -0.659 1 0.251 0.368 0.606
CA2 0.054 0.251 1 0.435 0.281
CA1 -0.122 0.368 0.435 1 0.372
Sub -0.701 0.606 0.281 0.372 1
mantel.rtest(dist(mac_cor), dist(hum_cor)) # Tests whether 2 covariance matrices differ
## Monte-Carlo test
## Observation: -0.08514423 
## Call: mantel.rtest(m1 = dist(mac_cor), m2 = dist(hum_cor))
## Based on 99 replicates
## Simulated p-value: 0.65
gdata <- gather(grouped, key = "Subfield", value = "Expression", DG:Sub )
  summary(aov(data=gdata, Expression ~ Species)) # Does expression differ across species?
##             Df Sum Sq Mean Sq F value Pr(>F)
## Species      1  0.048 0.04779   0.533  0.469
## Residuals   43  3.853 0.08960
  summary(aov(data=gdata, Expression ~ Subfield)) # Does expression differe across subfields?
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## Subfield     4  3.273  0.8182   52.15 2.4e-15 ***
## Residuals   40  0.628  0.0157                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    TukeyHSD(aov(data=adult_HP, Expression ~ structure_name2)) # Do subfields differ from one another?
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Expression ~ structure_name2, data = adult_HP)
## 
## $structure_name2
##                diff         lwr         upr     p adj
## CA2-CA1 -0.20512726 -0.43753190  0.02727738 0.1133456
## CA3-CA1 -0.06677176 -0.29917641  0.16563288 0.9572576
## CA4-CA1 -0.01541085 -0.25649490  0.22567321 0.9999654
## DG-CA1   0.51941436  0.31727285  0.72155587 0.0000000
## Sub-CA1  0.06571944 -0.16668520  0.29812408 0.9600192
## CA3-CA2  0.13835550 -0.11806923  0.39478022 0.6082369
## CA4-CA2  0.18971641 -0.07460014  0.45403297 0.2940266
## DG-CA2   0.72454162  0.49518837  0.95389487 0.0000000
## Sub-CA2  0.27084670  0.01442197  0.52727143 0.0325933
## CA4-CA3  0.05136092 -0.21295564  0.31567747 0.9924390
## DG-CA3   0.58618612  0.35683288  0.81553937 0.0000000
## Sub-CA3  0.13249120 -0.12393352  0.38891593 0.6512899
## DG-CA4   0.53482521  0.29668131  0.77296910 0.0000002
## Sub-CA4  0.08113029 -0.18318627  0.34544685 0.9437976
## Sub-DG  -0.45369492 -0.68304817 -0.22434167 0.0000038
  summary(aov(data=gdata, Expression ~ Error(Subfield/Species))) # Repeated-measures ANOVA
## 
## Error: Subfield
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  4  3.273  0.8182               
## 
## Error: Subfield:Species
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  5 0.3706 0.07411               
## 
## Error: Within
##           Df Sum Sq  Mean Sq F value Pr(>F)
## Residuals 35  0.257 0.007343

Wouldn’t necessarily trust the correlation matrices or Mantel test, since the there’s so few individuals per species. This is especially true of macaques, which only have 2-3 individuals.

Principal Component analyses: All Brain Regions: Human only

PCA <- prcomp(select(grouped, DG:Sub) %>% na.omit(), scale=TRUE, center=TRUE)
pander::pander(summary(PCA))
Principal Components Analysis
  PC1 PC2 PC3 PC4 PC5
DG 0.4779 -0.4905 0.0397 0.08903 0.7222
CA3 -0.5296 0.1214 -0.02066 0.7676 0.3394
CA2 -0.2616 -0.677 -0.6388 0.02188 -0.2543
CA1 -0.3578 -0.5215 0.7561 -0.07795 -0.1494
Sub -0.5429 0.1198 -0.1354 -0.6295 0.5256
  PC1 PC2 PC3 PC4 PC5
Standard deviation 1.609 1.131 0.7413 0.6295 0.4315
Proportion of Variance 0.5177 0.2559 0.1099 0.07926 0.03724
Cumulative Proportion 0.5177 0.7736 0.8835 0.9628 1
summPCA <- summary(PCA)

# Prep PCA data
grouped_omit <- na.omit(all)
Species <- grouped %>% na.omit() %>% select(Species) 
  levels(Species) <- c("Human", "Macaque")
DFforPlot <- data.frame(PC1=PCA$x[,1], PC2=PCA$x[,2], Species=Species)
  Human <- subset(DFforPlot, Species=="Human")
  Macaque <- subset(DFforPlot, Species=="Macaque")
# Plot PCA
ggplot(data=DFforPlot, aes(x=PC1, y=PC2, color=Species, fill=Species)) + 
  theme_bw(30) + 
  geom_polygon(aes(x=PC1, y=PC2), alpha=.3, data=Macaque[chull(Macaque$PC1, Macaque$PC2),]) +  
  geom_polygon(aes(x=PC1, y=PC2), alpha=.3, data=Human[chull(Human$PC1, Human$PC2),]) + 
  geom_point(size=5) + labs(title = paste(gene_name), x = paste("PC1","(", round(summPCA$importance[2,1]*100, 2), "%)"), y = paste("PC2","(", round(summPCA$importance[2,2]*100, 2), "%)"))

Can’t do interspecies analysis across all brain regions, because 1) not all subdivisions are present in both, 3) different names are often used, 3) humans have bilateral structures.

Can’t do human analysis because NO structure overlaps for all humans.


RNAseq

Developing Human

Adult Human

RNAadult <- filter(RNAseq, donor_age == "18 yrs"|donor_age == "19 yrs"|donor_age == "23 yrs"|donor_age == "30 yrs"|donor_age == "36 yrs"|donor_age == "37 yrs"|donor_age == "40 yrs") %>% droplevels()

# Top Structures: Average
ggplot(data = RNAadult, aes(x=top_level_structure_name, y=Expression, fill=top_level_structure_name)) + 
  stat_smooth(method="loess", aes(group=1), color="purple") +
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Adult Human",gene_name,"Expression: Top Structures"), x="Age", y="Expression Level")

# Structures: Average
ggplot(data = RNAadult, aes(x=structure_name, y=Expression, fill=structure_name)) + 
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Adult Human",gene_name,"Expression: Structures"), x="Age", y="Expression Level")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Structure: Interindividual variability
ggplot(data = RNAadult, aes(x=donor_age, y=Expression, fill=structure_name)) + 
  stat_smooth(method="loess", aes(group=1), color="purple") +
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Adult Human",gene_name,"Expression: Structures"), x="Age", y="Expression Level")

# Hippocampus only
ggplot(data = filter(RNAadult, structure_name == "hippocampus (hippocampal formation)"), aes(x=donor_age, y=Expression, fill=structure_name)) + 
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Adult Human",gene_name,"Expression: Hippocampus"), x="Age", y="Expression Level")

RNAseq vs. Microarray concordance

Combine adult human data

Development

# Macaque microarray vs. Human RNAseq: all regions
RNAseq_all <- RNAseq %>% mutate(Species = "Human", Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression, structure_name)
  RNAseq_all$donor_age <- gsub(RNAseq_all$donor_age, pattern = "37 pcw", replacement = "0 mo")

micro_all <- microarray %>% filter(Species=="Macaque") %>% 
  mutate(Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression, structure_name)
  micro_all$donor_age <- gsub(micro_all$donor_age, pattern = "48 mo", replacement = "2 yrs")
  micro_all$donor_age <- gsub(micro_all$donor_age, pattern = "12 mo", replacement = "1 yrs")
RNA_micro_all <- rbind(RNAseq_all, micro_all)

ggplot(data = RNA_micro_all, aes(x=donor_age, y=rescale(Expression, c(-1, 1)), fill=structure_name)) + 
  stat_smooth(method="loess", aes(group=1), color="purple") + facet_wrap(~Species, nrow = 2) +
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Human",gene_name,"Expression Over Development: Structures"), x="Age", y="Expression Level") +
  scale_x_discrete(limits=c("E40", "E50", "8 pcw", "9 pcw","E70", "E80", "12 pcw", "E90", "13 pcw", "16 pcw", "17 pcw", "E120", "19 pcw", "21 pcw", "24 pcw", "25 pcw", "26 pcw", "35 pcw", "0 mo", "3 mo", "4 mos", "10 mos", "1 yrs", "2 yrs",  "3 yrs",  "4 yrs",  "8 yrs",  "11 yrs",  "13 yrs",  "15 yrs",  "18 yrs",  "19 yrs", "23 yrs",  "30 yrs", "36 yrs",  "37 yrs",  "40 yrs"))  + theme(legend.position = "none", legend.key.size = unit(.1, "cm"), axis.text.x = element_text(angle = 45, hjust = 1))

# Macaque microarray vs. Human RNAseq: hippocampus
RNAseq_HP <- RNAseq %>% filter(structure_name == "hippocampus (hippocampal formation)") %>% mutate(Species = "Human", Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression)
  RNAseq_HP$donor_age <- gsub(RNAseq_HP$donor_age, pattern = "37 pcw", replacement = "0 mo")

micro_HP <- microarray %>% filter(Species=="Macaque") %>% 
  mutate(Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression)
  micro_HP$donor_age <- gsub(micro_HP$donor_age, pattern = "48 mo", replacement = "2 yrs")
  micro_HP$donor_age <- gsub(micro_HP$donor_age, pattern = "12 mo", replacement = "1 yrs")
RNA_micro_HP <- rbind(RNAseq_HP, micro_HP)

gr_RNA_micro_HP <- RNA_micro_HP %>% group_by(Species, donor_age) %>% dplyr::summarise(Expression = mean(Expression, na.rm = TRUE))

ggplot(data = gr_RNA_micro_HP, aes(x=donor_age, y=rescale(Expression, c(-1, 1)), fill=Species)) + 
  stat_smooth(method="loess", aes(group=Species)) + facet_wrap(~Species, nrow = 2) +
  geom_bar(stat = "identity", position="dodge") +
  labs(title=paste("Human",gene_name,"Expression Over Development: Hippocampus"), x="Age", y="Expression Level") +
  scale_x_discrete(limits=c("E40", "E50", "8 pcw", "9 pcw","E70", "E80", "12 pcw", "E90", "13 pcw", "16 pcw", "17 pcw", "E120", "19 pcw", "21 pcw", "24 pcw", "25 pcw", "26 pcw", "35 pcw", "0 mo", "3 mo", "4 mos", "10 mos", "1 yrs", "2 yrs",  "3 yrs",  "4 yrs",  "8 yrs",  "11 yrs",  "13 yrs",  "15 yrs",  "18 yrs",  "19 yrs", "23 yrs",  "30 yrs", "36 yrs",  "37 yrs",  "40 yrs"))  + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, vjust=0.5), legend.position = "top")

Adults

RNAedit <- RNAadult %>% select(donor_name, donor_age, structure_name, structure_abbreviation, top_level_structure_name, Expression) %>%   mutate(Method = "RNAseq")
microedit <- human %>% select(donor_name, donor_age, structure_name, structure_abbreviation, top_level_structure_name, Expression) %>%    mutate(Method = "microarray")
  RNA_micro <- rbind(RNAedit, microedit)
  RNA_micro <- merge(RNAedit, microedit, by = "structure_abbreviation")

Can’t compare across structures bc of different anatomical levels and nomenclature. But COULD compare within a structure (e.g. hippocampus) across many genes. Bioinformatics?